HW 03

Author

Nandakumar Kuthalaraja

if (!require("pacman")) 
  install.packages("pacman")

pacman::p_load(tidyverse,gghighlight,openintro,lubridate,palmerpenguins,glue,scales,countdown,ggthemes,gt,openintro,ggrepel,patchwork,quantreg,janitor,colorspace,broom,fs,here,dsbox,ggridges,gtable,ggimage,ggpubr,cowplot,png,sysfonts,showtext,grid)

# set theme for ggplot2
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))

#Add Fonts as Needed
font_add("oldFont", "font/Wolfsmith.ttf")  
showtext_auto()

# set width of code output
options(width = 85)

# set figure parameters for knitr
knitr::opts_chunk$set(
  fig.width = 7,        # 7" width
  fig.asp = 0.618,      # the golden ratio
  fig.retina = 3,       # dpi multiplier for displaying HTML output on retina
  fig.align = "center", # center align figures
  dpi = 300             # higher dpi, sharper image
)

1 - Du Bois challenge.

# Read Data 
income <- read_csv("data/income.csv")

# Recode final class label for line break
income <- income |>
  mutate(
    Class = recode(Class, "$1000 AND OVER" = "1000\nAND OVER")
  )

# left table
label_table <- income |>
  mutate(
    Class = factor(Class, levels = rev(c(
      "$100-200", "$200-300", "$300-400", "$400-500",
      "$500-750", "$750-1000", "1000\nAND OVER"
    ))),
    row = as.numeric(Class)
  ) |>
  ggplot(aes(y = Class)) +
  geom_tile(aes(x = 1.5, width = 2, height = 1), fill = NA, color = "black") +
  geom_segment(
    data = tibble(y = seq(0.5, 7.5, 1)),
    aes(x = 0.9, xend = 2.9, y = y, yend = y),
    inherit.aes = FALSE,
    color = "gray7", linewidth = 0.2
  ) +
  geom_segment(
    data = tibble(x = c(0.9, 1.95, 2.9)),
    aes(x = x, xend = x, y = 0.5, yend = 7.5),
    inherit.aes = FALSE,
    color = "gray7", linewidth = 0.2
  ) +
  geom_text(aes(x = 1, label = Class), hjust = 0, size = 3.2, family = "oldFont") +
  geom_text(aes(x = 2, label = paste0("$", Average_Income)), hjust = 0, size = 3.2, family = "oldFont") +
  scale_y_discrete(limits = rev(c(
    "$100-200", "$200-300", "$300-400", "$400-500",
    "$500-750", "$750-1000", "1000\nAND OVER"
  ))) +
  xlim(0.9, 2.9) +
  labs(title = NULL, x = NULL, y = NULL) +
  theme_void() +
  theme(
    plot.margin = margin(5, 0, 5, 5),
    axis.text.y = element_blank(),
    plot.background = element_rect(fill = NA, color = NA),  
    panel.background = element_rect(fill = NA, color = NA)
  )

data <- income |>
  pivot_longer(cols = Rent:Other, names_to = "type", values_to = "measurement") |>
  mutate(
    type = factor(type, levels = c("Rent", "Food", "Clothes", "Tax", "Other")),
    Class = factor(Class, levels = rev(c(
      "$100-200", "$200-300", "$300-400", "$400-500",
      "$500-750", "$750-1000", "1000\nAND OVER"
    )))
  ) 
data$measurement[data$measurement == 0] <- 0.1

category <- c(
  Rent = "#000000",
  Food = "#8663A3",
  Clothes = "#E89A8D",
  Tax = "slategray3",
  Other = "#D6C8B0"
)

bar_plot <- ggplot(data, aes(x = Class, y = measurement, fill = type)) +
  geom_bar(stat = "identity", width = 0.8) +
  geom_text(
    aes(label = ifelse(measurement >= 1, paste0(measurement, "%"), "")),
    position = position_stack(vjust = 0.5),
    size = 3,
    color = "white"
  ) +
  scale_y_reverse() +
  scale_fill_manual(values = category) +
  coord_flip() +
  labs(
    title = "", 
  x = NULL,
  y = NULL,
  caption = "FOR FURTHER STATISTICS RAISE THIS FRAME."
  ) +
 
  theme(
    axis.text.y = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_blank(),
    legend.title = element_blank(),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.justification = "center",
     plot.caption = element_text(
       family = "oldFont",
      hjust = 0.5,       
      size = 7,
      margin = margin(t = 10)
    ),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank()
  
  )


combined_plot <- (label_table + bar_plot + plot_layout(widths = c(1.3, 3)))

final_plot <- ggbackground(combined_plot, "images/parchment_paper-1074131_1920.png")
print(final_plot)

2 - COVID survey - interpret

This plot offers a comprehensive look at how medical and nursing students from various backgrounds responded to statements about the COVID-19 vaccine, using a Likert scale where lower scores signify greater agreement with positive vaccine sentiments

Asian students expressed more concern about safety/side effects of Vaccine

Seems Asian students have a higher mean score than most other groups. This reflects the dynamic Cultural aspects & maybe historical experiences with medical procedures.

Think it is little disappointing compared to, how they had high compliance with public health measures otherwise.

Black/African students report higher agreement recommending vaccine

Despite broader societal data showing lower vaccine confidence among Black Americans (due to historic medical mistreatment and systemic inequities), this subgroup shows Strong agreement with recommedning vaccine.

Only reason I believe this is as thy are healthcare students, who may have more access to scientific information.

Medical students tend to be more confident than nursing students

Medical students show slightly lower (more confident) scores in: The scientific vetting process & on trusting the information received

Medical students may have had more exposure to the bio-medical research or vaccine development process during training, resulting in higher trust in science. Agree with this assessment.

3 - COVID survey - reconstruct

covid <- read_csv("data/covid-survey.csv")
#dim(covid)
### Old Dimension
#Re-Aligning to row1
covid <- read_csv("data/covid-survey.csv", skip=1)
dim(covid)
[1] 1121   14
cols_to_check <- setdiff(names(covid), "response_id")
covid <- covid |>
  filter(!if_all(all_of(cols_to_check), is.na))

dim(covid)
[1] 1111   14
vax_levels <- c("0" = "No", "1" = "Yes")
profession_levels <- c("0" = "Medical", "1" = "Nursing")
gender_levels <- c("0" = "Male", "1" = "Female", "3" = "Non-binary third gender", "4" = "Prefer not to say")
race_levels <- c(
  "1" = "American Indian/Alaskan Native", 
  "2" = "Asian", 
  "3" = "Black/African American", 
  "4" = "Native Hawaiian/Other Pacific Islander", 
  "5" = "White"
)
ethnicity_levels <- c("1" = "Hispanic/Latino", "2" = "Non-Hispanic/Non-Latino")
age_bin_levels <- c("0" = "<20", "20" = "21-25", "25" = "26-30", "30" = ">30")

covid <- covid |>
  mutate(
    exp_already_vax = recode(exp_already_vax, !!!vax_levels),
    exp_flu_vax = recode(exp_flu_vax, !!!vax_levels),
    exp_profession = recode(exp_profession, !!!profession_levels),
    exp_gender = recode(exp_gender, !!!gender_levels),
    exp_race = recode(exp_race, !!!race_levels),
    exp_ethnicity = recode(exp_ethnicity, !!!ethnicity_levels),
    exp_age_bin = recode(exp_age_bin, !!!age_bin_levels)
  )
dim(covid)
[1] 1111   14
covid_survey_longer <- covid |>
  pivot_longer(
    cols = starts_with("exp_"),
    names_to = "explanatory",
    values_to = "explanatory_value"
  ) |>
  filter(!is.na(explanatory_value)) |>
  pivot_longer(
    cols = starts_with("resp_"),
    names_to = "response",
    values_to = "response_value"
  )

covid_survey_longer
# A tibble: 43,428 × 5
   response_id explanatory    explanatory_value response               response_value
         <dbl> <chr>          <chr>             <chr>                           <dbl>
 1           1 exp_profession Nursing           resp_safety                         5
 2           1 exp_profession Nursing           resp_confidence_scien…              2
 3           1 exp_profession Nursing           resp_concern_safety                 2
 4           1 exp_profession Nursing           resp_feel_safe_at_work              1
 5           1 exp_profession Nursing           resp_will_recommend                 1
 6           1 exp_profession Nursing           resp_trust_info                     1
 7           1 exp_flu_vax    Yes               resp_safety                         5
 8           1 exp_flu_vax    Yes               resp_confidence_scien…              2
 9           1 exp_flu_vax    Yes               resp_concern_safety                 2
10           1 exp_flu_vax    Yes               resp_feel_safe_at_work              1
# ℹ 43,418 more rows
covid_survey_longer <- covid_survey_longer |>
  mutate(response_value = as.numeric(response_value))

sapply(covid_survey_longer, class)
      response_id       explanatory explanatory_value          response 
        "numeric"       "character"       "character"       "character" 
   response_value 
        "numeric" 
covid_survey_summary <- covid_survey_longer |>
  group_by(explanatory, explanatory_value, response) |>
  summarise(mean = mean(response_value, na.rm = TRUE),
            low = quantile(response_value, 0.1, na.rm = TRUE),
            high = quantile(response_value, 0.9, na.rm = TRUE))
`summarise()` has grouped output by 'explanatory', 'explanatory_value'. You can
override using the `.groups` argument.
covid_survey_summary
# A tibble: 126 × 6
# Groups:   explanatory, explanatory_value [21]
   explanatory explanatory_value response                 mean   low  high
   <chr>       <chr>             <chr>                   <dbl> <dbl> <dbl>
 1 exp_age_bin 21-25             resp_concern_safety      3.32     2     5
 2 exp_age_bin 21-25             resp_confidence_science  1.31     1     2
 3 exp_age_bin 21-25             resp_feel_safe_at_work   1.20     1     2
 4 exp_age_bin 21-25             resp_safety              1.95     1     5
 5 exp_age_bin 21-25             resp_trust_info          1.29     1     2
 6 exp_age_bin 21-25             resp_will_recommend      1.09     1     1
 7 exp_age_bin 26-30             resp_concern_safety      3.35     1     5
 8 exp_age_bin 26-30             resp_confidence_science  1.40     1     2
 9 exp_age_bin 26-30             resp_feel_safe_at_work   1.29     1     2
10 exp_age_bin 26-30             resp_safety              2.16     1     5
# ℹ 116 more rows
covid_survey_summary_all <- covid_survey_longer |>
  group_by(response) |>
  summarise(mean = mean(response_value, na.rm = TRUE),
            low = quantile(response_value, 0.1, na.rm = TRUE),
            high = quantile(response_value, 0.9, na.rm = TRUE)
            ) |>
  mutate(
    explanatory = "All",
    explanatory_value = ""
  )

covid_survey_summary_all$explanatory_value <- as.factor(as.character(covid_survey_summary_all$explanatory_value))

sapply(covid_survey_summary_all, class)
         response              mean               low              high 
      "character"         "numeric"         "numeric"         "numeric" 
      explanatory explanatory_value 
      "character"          "factor" 
covid_survey_summary_all
# A tibble: 6 × 6
  response                 mean   low  high explanatory explanatory_value
  <chr>                   <dbl> <dbl> <dbl> <chr>       <fct>            
1 resp_concern_safety      3.28     1     5 All         ""               
2 resp_confidence_science  1.43     1     2 All         ""               
3 resp_feel_safe_at_work   1.36     1     2 All         ""               
4 resp_safety              2.03     1     5 All         ""               
5 resp_trust_info          1.40     1     2 All         ""               
6 resp_will_recommend      1.21     1     2 All         ""               
covid_survey_summary_stats <- bind_rows(covid_survey_summary_all, covid_survey_summary)

covid_survey_summary_stats
# A tibble: 132 × 6
   response                 mean   low  high explanatory explanatory_value
   <chr>                   <dbl> <dbl> <dbl> <chr>       <chr>            
 1 resp_concern_safety      3.28     1     5 All         ""               
 2 resp_confidence_science  1.43     1     2 All         ""               
 3 resp_feel_safe_at_work   1.36     1     2 All         ""               
 4 resp_safety              2.03     1     5 All         ""               
 5 resp_trust_info          1.40     1     2 All         ""               
 6 resp_will_recommend      1.21     1     2 All         ""               
 7 resp_concern_safety      3.32     2     5 exp_age_bin "21-25"          
 8 resp_confidence_science  1.31     1     2 exp_age_bin "21-25"          
 9 resp_feel_safe_at_work   1.20     1     2 exp_age_bin "21-25"          
10 resp_safety              1.95     1     5 exp_age_bin "21-25"          
# ℹ 122 more rows
covid_survey_summary_stats <- covid_survey_summary_stats |>
  mutate(
    explanatory_value = fct_relevel(explanatory_value, ">30", "26-30", "21-25", "<20"),
    explanatory_value = fct_relevel(explanatory_value, "Female", "Male", "Non-binary third gender", "Prefer not to say"),
    explanatory_value = fct_relevel(explanatory_value, "American Indian/Alaskan Native", "Asian", "Black/African American", "Native Hawaiian/Other Pacific Islander", "White"),
    explanatory_value = fct_relevel(explanatory_value, "Non-Hispanic/Non-Latino", "Hispanic/Latino"),
    explanatory_value = fct_relevel(explanatory_value, "Medical", "Nursing"),
    explanatory_value = fct_relevel(explanatory_value, "No", "Yes")
  )


covid_survey_summary_stats <- covid_survey_summary_stats |>
  mutate(
    explanatory = fct_relevel(explanatory,
      "All",
      "exp_age_bin", "exp_gender", "exp_race", "exp_ethnicity",
      "exp_profession", "exp_already_vax", "exp_flu_vax"
    )
  )

covid_survey_summary_stats <- covid_survey_summary_stats |>
  mutate(
    explanatory = recode(explanatory,
      "exp_age_bin" = "Age",
      "exp_gender" = "Gender",
      "exp_race" = "Race",
      "exp_ethnicity" = "Ethnicity",
      "exp_profession" = "Profession",
      "exp_already_vax" = "Had COVID vaccine",
      "exp_flu_vax" = "Had flu vaccine this year"
    ),
    response = fct_relevel(response,
      "resp_safety",
      "resp_feel_safe_at_work",
      "resp_concern_safety",
      "resp_confidence_science",
      "resp_trust_info",
      "resp_will_recommend"
    ),
    response = recode(response,
      "resp_safety" = "I believe the vaccine is safe",
      "resp_feel_safe_at_work" = "The vaccine makes me feel safer at work",
      "resp_concern_safety" = "I'm concerned about safety and side effects",
      "resp_confidence_science" = "I trust the scientific vetting process",
      "resp_trust_info" = "I trust the info I received about the vaccine",
      "resp_will_recommend" = "I will recommend the vaccine to others"
    )
  )

covid_survey_summary_stats
# A tibble: 132 × 6
   response                            mean   low  high explanatory explanatory_value
   <fct>                              <dbl> <dbl> <dbl> <fct>       <fct>            
 1 I'm concerned about safety and si…  3.28     1     5 All         ""               
 2 I trust the scientific vetting pr…  1.43     1     2 All         ""               
 3 The vaccine makes me feel safer a…  1.36     1     2 All         ""               
 4 I believe the vaccine is safe       2.03     1     5 All         ""               
 5 I trust the info I received about…  1.40     1     2 All         ""               
 6 I will recommend the vaccine to o…  1.21     1     2 All         ""               
 7 I'm concerned about safety and si…  3.32     2     5 Age         "21-25"          
 8 I trust the scientific vetting pr…  1.31     1     2 Age         "21-25"          
 9 The vaccine makes me feel safer a…  1.20     1     2 Age         "21-25"          
10 I believe the vaccine is safe       1.95     1     5 Age         "21-25"          
# ℹ 122 more rows
ggplot(covid_survey_summary_stats, aes(x = mean, y = explanatory_value)) +
  geom_point(size = 0.85) +
  geom_errorbarh(aes(xmin = low, xmax = high), height = 0.3) +
  facet_grid(
    rows = vars(explanatory),
    cols = vars(response),
    scales = "free_y",
    space = "free_y",
    labeller = labeller(
      explanatory = label_wrap_gen(15),
      response = label_wrap_gen(15)
    )
  ) +
  scale_x_continuous(breaks = 1:5) +
  labs(
    x = "Mean Likert score\n(Error bars show 10th–90th percentile)",
    y = NULL
  ) +
  theme(
    strip.text = element_text(size = 6),
    strip.text.y = element_text(angle = 0),
    axis.text.x = element_text(size = 6),
    axis.text.y = element_text(size = 6),
    axis.title.x = element_text(size = 8),
    panel.spacing = unit(0.2, "lines"),
    panel.grid = element_blank(),
    strip.background = element_rect(fill = "gray90", color = "black")
  )

4 - COVID survey - re-reconstruct

5 - COVID survey - another view